{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "import matplotlib.pyplot as plt\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>148.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>85.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>183.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>89.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>137.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>168.0</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0          6                         148.0            72.0   \n",
       "1          1                          85.0            66.0   \n",
       "2          8                         183.0            64.0   \n",
       "3          1                          89.0            66.0   \n",
       "4          0                         137.0            40.0   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin   BMI  \\\n",
       "0                         35.0          125.0  33.6   \n",
       "1                         29.0          125.0  26.6   \n",
       "2                         29.0          125.0  23.3   \n",
       "3                         23.0           94.0  28.1   \n",
       "4                         35.0          168.0  43.1   \n",
       "\n",
       "   Diabetes_pedigree_function  Age  Target  \n",
       "0                       0.627   50       1  \n",
       "1                       0.351   31       0  \n",
       "2                       0.672   32       1  \n",
       "3                       0.167   21       0  \n",
       "4                       2.288   33       1  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train=pd.read_csv(\"FE_pima-indians-diabetes.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train=train['Target']\n",
    "x_train=train.drop([\"Target\"],axis=1)\n",
    "feat_names=x_train.columns\n",
    "from scipy.sparse import csr_matrix\n",
    "x_train=csr_matrix(x_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "lr=LogisticRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold if: [0.50976854 0.52896191 0.47487889 0.45092878 0.48412048]\n",
      "cv logloss is: 0.48973171944649774\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lingfengyu/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/home/lingfengyu/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/home/lingfengyu/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/home/lingfengyu/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/home/lingfengyu/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "loss=cross_val_score(lr,x_train,y_train,cv=5,scoring='neg_log_loss')\n",
    "print ('logloss of each fold if:',-loss)\n",
    "print ('cv logloss is:',-loss.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='liblinear',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='warn', n_jobs=1,\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring='neg_log_loss', verbose=0)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "penaltys=['l1','l2']\n",
    "Cs=[0.1,1,10,100,1000]\n",
    "tuned_parameters=dict(penalty=penaltys,C=Cs)\n",
    "\n",
    "lr_penalty=LogisticRegression(solver='liblinear')\n",
    "grid=GridSearchCV(lr_penalty,tuned_parameters,cv=5,scoring='neg_log_loss',n_jobs=1,)\n",
    "grid.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.47638123810132\n",
      "{'C': 10, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "print(-grid.best_score_)\n",
    "print(grid.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='liblinear',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='warn', n_jobs=1,\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring='accuracy', verbose=0)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "penaltys=['l1','l2']\n",
    "Cs=[0.1,1,10,100,1000]\n",
    "tuned_parameters=dict(penalty=penaltys,C=Cs)\n",
    "\n",
    "lr_penalty=LogisticRegression(solver='liblinear')\n",
    "grid=GridSearchCV(lr_penalty,tuned_parameters,cv=5,scoring='accuracy',n_jobs=1,)\n",
    "grid.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.7708333333333334\n",
      "{'C': 10, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "print(-grid.best_score_)\n",
    "print(grid.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
